#!/usr/bin/python3
# coding=utf-8
import sys
import json
import random


def bwToValue(bw):
    units = ["B", "KB", "MB", "GB", "TB", "PB"]
    if str(bw) == '0':
        return 0
    for i in range(5, -1, -1):
        if units[i] in bw:
            return float(bw.split(units[i])[0]) * pow(1024, i)


def humConvert(value):
    units = ["B", "KB", "MB", "GB", "TB", "PB"]
    size = 1024.0

    if value == 0:
        return value
    for i in range(len(units)):
        if (value / size) < 1:
            return "%.1f%s/s" % (value, units[i])
        value = value / size


def ioutilDataPaser(data, resultInfo):
    tUnit = None
    totalBw = totalIops = 0
    for ds in data['mstats']:
        iops = ds['iops_rd'] + ds['iops_wr']
        bps = bwToValue(ds['bps_wr']) + bwToValue(ds['bps_rd'])
        totalBw += bps
        totalIops += iops
        key = ds['comm']+':'+ds['pid']+':'+ds['device']
        if not tUnit:
            if ds['bps_wr'] != '0':
                tUnit = ds['bps_wr'].split('/')[1]
            else:
                tUnit = ds['bps_rd'].split('/')[1]
        if key not in resultInfo.keys():
            resultInfo.setdefault(key,
                                  {'disk': ds['device'], 'maxIops': 0, 'maxBps': 0, 'file': ds['file']})
        resultInfo[key]['maxBps'] = max(bps, resultInfo[key]['maxBps'])
        resultInfo[key]['maxIops'] = max(iops, resultInfo[key]['maxIops'])
        if resultInfo[key]['maxBps'] != bps or resultInfo[key]['maxIops'] != iops:
            resultInfo[key]['file'] = ds['file']
            if 'bufferio' in resultInfo.keys():
                del resultInfo[key]['bufferio']
        if 'bufferio' in ds.keys() and 'bufferio' not in resultInfo[key].keys():
            resultInfo[key].setdefault('bufferio', ds['bufferio'])
    return totalIops, totalBw, tUnit


def ioutilReport(resultInfo, tUnit, diagret):
    top = 1
    suggestPS = reason = ''
    resultInfo = \
        sorted(resultInfo.items(), key=lambda e: e[1]['maxBps'], reverse=True)
    for key, val in resultInfo:
        file = ', target file:'+str(val['file']) if val['file'] != '-' else ''
        if 'kworker' in str(key):
            kTasklist = []
            if 'bufferio' in val.keys():
                for i in val["bufferio"]:
                    if 'KB' in i["Wrbw"]:
                        continue
                    kTasklist.append(i['task'])
                    file += ('%s Wrbw %s disk %s file %s;' %
                             (i['task'], i["Wrbw"], i["device"], i["file"]))
            if len(kTasklist):
                file = '(Write bio from: '+file+')'
            if top == 1:
                suggestPS = '(Found \'kworker\' flush dirty pages, Try to reduce'\
                    ' the buffer-IO write?%s or check the config /proc/sys/vm/'\
                    '{dirty_ratio,dirty_background_ratio} too small?)' % (
                        '('+';'.join(kTasklist)+')' if len(kTasklist) else '')
        maxBps = humConvert(val['maxBps']).replace('s', tUnit)
        reason += ('%d. task[%s], access disk %s with iops:%s, bps:%s%s; \n' % (
            top, str(key.rsplit(':', 1)[0]), str(val['disk']),
            str(val['maxIops']), maxBps, file))
        if top == 1 and suggestPS == '':
            suggestPS = '(Found task \'%s\')' % (str(key.rsplit(':', 1)[0]))
        top += 1
    suggest = \
        'Optimize the tasks that contributes the most IO flow%s' % suggestPS
    return [{'result': diagret, 'reason': reason, 'solution': suggest}]


def dataToSummary(raw):
    resultInfo = {}
    totalBw = 0
    maxIops = maxBw = 0
    minIops = minBw = sys.maxsize
    tUnit = None

    for data in raw.split('\n'):
        if "mstats" not in str(data):
            continue
        try:
            stat = json.loads(data)
        except Exception:
            return
        iops, bw, tUnit = ioutilDataPaser(stat, resultInfo)
        maxIops = max(maxIops, iops)
        minIops = min(minIops, iops)
        maxBw = max(maxBw, bw)
        minBw = min(minBw, bw)
        totalBw += bw

    if resultInfo:
        content = 'Iops:'+str(minIops)+'~'+str(maxIops) +\
            ', Bps:'+humConvert(minBw).replace('s', tUnit) +\
            '~'+humConvert(maxBw).replace('s', tUnit)
        diagret = 'detect ('+content+')'
        rep = ioutilReport(resultInfo, tUnit, diagret)
        return rep


def getDisksFromDiskstats(diskstats):
    disks = []
    for o in diskstats:
        if o['diskname'] not in disks:
            add = True
            for d in disks:
                # is master dev and part in disks, replace
                if o['diskname'] in d:
                    if o['diskname'] not in disks:
                        disks[disks.index(d)] = o['diskname']
                    else:
                        # clear muti parts in disks
                        disks.remove(d)
                    add = False
                    continue
                # is part dev and master in disks, do nothing
                if d in o['diskname']:
                    add = False
                    break
            if add:
                disks.append(o['diskname'])
    return disks


def iofsstatJoinData(raw):
    postprocess_result = {
        "code": 0,
        "err_msg": "",
        "result": {}
    }
    if raw.find("diskstats") == -1:
        postprocess_result['code'] = 1
        postprocess_result['err_msg'] = f"Diagnosis failed: \n {raw}"
        print(json.dumps(postprocess_result, indent=4))
        return
    stat = {}
    stat["disks"] = {"data": []}
    stat["overview"] = {"data": []}
    stat["overview"]["data"] = dataToSummary(raw)
    stat["summary"] = "diagnose results: No IO traffic detected"
    if stat["overview"]["data"]:
        stat["summary"] = \
            "diagnose results: %s, caused by \n%ssolution:%s" % (
            stat["overview"]["data"][0]['result'],
            stat["overview"]["data"][0]['reason'],
            stat["overview"]["data"][0]['solution'],
        )
    for s in raw.split('\n'):
        try:
            obj = json.loads(s)
        except Exception:
            continue
        if "diskstats" in str(s):
            disks = getDisksFromDiskstats(obj["diskstats"])
            stat["disks"]["data"] = \
                [{'key': disks.index(d), 'value': d} for d in disks]
            for d in disks:
                if ("diskIOstat_"+d) not in stat.keys():
                    stat["diskIOstat_"+d] = {"data": []}
                for o in obj["diskstats"]:
                    if d in o['diskname']:
                        stat["diskIOstat_"+d]["data"].append(o)
        elif "mstats" in str(s):
            for d in disks:
                if ("taskIOstat_"+d) not in stat.keys():
                    stat["taskIOstat_"+d] = {"data": []}
                    stat["taskIOblocksize_"+d] = {"data": []}

                for o in obj["mstats"]:
                    if d not in o['device']:
                        continue
                    pat = {'comm': o['comm'], 'tgid:pid': '-:'+o['pid']}
                    patKey = []
                    for key, value in o.items():
                        if 'pat' in key:
                            if o['iops_wr'] == 0 or value == 0:
                                pat[key] = '0'
                            else:
                                pat[key] = \
                                    format(
                                        value/(o['iops_wr']*1.0)*100, '.2f')+'%'
                            patKey.append(key)
                    for key in patKey:
                        del o[key]
                    stat["taskIOblocksize_"+d]["data"].append(pat)

                    o['file'] = str(o['file'])
                    if 'bufferio' in o.keys():
                        o['children'] = o['bufferio']
                        for e in o['children']:
                            task = e['task'].rsplit(':', 2)
                            e['comm'] = task[0]
                            e['tgid:pid'] = task[1]+':'+task[2]
                            e['bps_wr'] = e['Wrbw']
                        del o['bufferio']
                    new = list(o.items())
                    new[1] = ('tgid:pid', '-:'+o['pid'])
                    idx = obj["mstats"].index(o)
                    obj["mstats"][idx] = dict(new)
                    stat["taskIOstat_"+d]["data"].append(obj["mstats"][idx])
    postprocess_result['result'] = stat
    s = json.dumps(postprocess_result, indent=4)
    print(s)


def extract_params():
    path, res, task_id = sys.argv[1], "", sys.argv[2]
    with open(path, 'r') as tmp:
        res = tmp.read()
    return res, task_id


if __name__ == "__main__":
    res, _ = extract_params()
    iofsstatJoinData(res)
